# =============================================================================
# FILE 3. DESCRIPTIVE STATISTICS
# =============================================================================
# PURPOSE:
#   Produce descriptive statistics and publication-ready figures comparing
#   citizen and politician expectations (seats + government participation) for
#   CF Flanders 2023 and 2024 survey waves.
#
# REPLICATION (QUICK START):
#   1) Update the setwd() path in Section 1 to your local project root.
#   2) Place the required input files in that root (or adjust file paths):
#        - Data_Citizens_2023.sav
#        - Data_Citizens_2024.sav
#        - Data_Politicians_2023.sav (if applicable; see load section below)
#        - Data_Politicians_2024.sav (if applicable; see load section below)
#   3) Run the script top-to-bottom in a fresh R session.
#
# EXPECTED OUTPUTS:
#   - In-memory data frames used for descriptives (wide + long versions).
#   - ggplot objects for the figures (some scripts also ggsave() to disk).
# =============================================================================

################################################################################
# 1. WORKING DIRECTORY + FILE LOCATIONS
################################################################################

getwd()

# Set project directory (update it to your own directory that contains the input files)
setwd("updated_path_here")

################################################################################
## 2. DESCRIPTIVES
################################################################################

# Set facet titles
seats_labels <- c(
  "PVDA" = "PVDA (+5 seats)",
  "Groen" = "Groen (–5 seats)",
  "Vooruit" = "Vooruit (+5 seats)",
  "CD&V" = "CD&V (–3 seats)",
  "Open Vld" = "Open Vld (–7 seats)",
  "N-VA" = "N-VA (–4 seats)",
  "Vlaams Belang" = "Vlaams Belang (+8 seats)"
)

# Set facet titles
govt_labels <- c(
  "PVDA" = "PVDA",
  "Groen" = "Groen",
  "Vooruit" = "Vooruit*",
  "CD&V" = "CD&V*",
  "Open Vld" = "Open Vld",
  "N-VA" = "N-VA*",
  "Vlaams Belang" = "Vlaams Belang"
)

################################################################################
## 2.1. CITIZEN SURVEYS
################################################################################
#
# Purpose:
# - Prepare and analyze citizen survey data (2023 & 2024) for descriptives/figures.
#
# Replication notes:
# - The script harmonizes question codes across waves into common variables.
# - Missing values are often encoded as -99 and are recoded to NA before analysis.
#

################################################################################
## 2.1.1. LOAD DATA (2023)
################################################################################
#
# Purpose:
# - Read the raw survey file for this wave/year into memory.
#
# Replication notes:
# - Ensure the .sav file is present in the working directory (or adjust the relative path).
# - haven::read_sav() preserves SPSS labels; subsequent steps may remove/convert labels.
# - After loading, the script standardizes column names/cases to reduce downstream surprises.
#

# Load data (2023)
cf.flanders.2023.data <- read_sav("Data_Citizens_2023.sav")        

# Lowercase column names (2023)
colnames(cf.flanders.2023.data) <- tolower(colnames(cf.flanders.2023.data))

################################################################################
## 2.1.2. LOAD DATA (2024)
################################################################################
#
# Purpose:
# - Read the raw survey file for this wave/year into memory.
#
# Replication notes:
# - Ensure the .sav file is present in the working directory (or adjust the relative path).
# - haven::read_sav() preserves SPSS labels; subsequent steps may remove/convert labels.
# - After loading, the script standardizes column names/cases to reduce downstream surprises.
#

# Load data (2024)
cf.flanders.2024.data <- read_sav("Data_Citizens_2024.sav")        

################################################################################
## 2.1.3. SEAT EXPECTATIONS
################################################################################
#
# Purpose:
# - Create harmonized variables representing expected seat change by party.
#
# Replication notes:
# - Note that item names differ across 2023 vs 2024 (q24_* vs Q24_*).
# - Downstream code assumes these derived variables exist with consistent names (seats.<party>).
#

# Seat expectations (2023)
cf.flanders.2023.data$seats.pvda <- cf.flanders.2023.data$q24_1
cf.flanders.2023.data$seats.groen <- cf.flanders.2023.data$q24_2
cf.flanders.2023.data$seats.vooruit <- cf.flanders.2023.data$q24_3
cf.flanders.2023.data$seats.cdv <- cf.flanders.2023.data$q24_4
cf.flanders.2023.data$seats.ovld <- cf.flanders.2023.data$q24_5
cf.flanders.2023.data$seats.nva <- cf.flanders.2023.data$q24_6
cf.flanders.2023.data$seats.vb <- cf.flanders.2023.data$q24_7

# Seat expectations (2024)
cf.flanders.2024.data$seats.pvda <- cf.flanders.2024.data$Q24_4
cf.flanders.2024.data$seats.groen <- cf.flanders.2024.data$Q24_5
cf.flanders.2024.data$seats.vooruit <- cf.flanders.2024.data$Q24_6
cf.flanders.2024.data$seats.cdv <- cf.flanders.2024.data$Q24_7
cf.flanders.2024.data$seats.ovld <- cf.flanders.2024.data$Q24_9
cf.flanders.2024.data$seats.nva <- cf.flanders.2024.data$Q24_10
cf.flanders.2024.data$seats.vb <- cf.flanders.2024.data$Q24_11

################################################################################
## 2.1.4. GOVERNMENT EXPECTATIONS
################################################################################
#
# Purpose:
# - Create harmonized variables representing expectations about parties entering government.
#
# Replication notes:
# - Item names differ across 2023 vs 2024 (q25_* vs Q25_*).
# - Later, the script shifts some government variables by -1 to align scales across waves.
#

# Government expectations (2023)
cf.flanders.2023.data$govt.pvda <- cf.flanders.2023.data$q25_1
cf.flanders.2023.data$govt.groen <- cf.flanders.2023.data$q25_2
cf.flanders.2023.data$govt.vooruit <- cf.flanders.2023.data$q25_3
cf.flanders.2023.data$govt.cdv <- cf.flanders.2023.data$q25_4
cf.flanders.2023.data$govt.ovld <- cf.flanders.2023.data$q25_5
cf.flanders.2023.data$govt.nva <- cf.flanders.2023.data$q25_6
cf.flanders.2023.data$govt.vb <- cf.flanders.2023.data$q25_7

# Government expectations (2024)
cf.flanders.2024.data$govt.pvda <- cf.flanders.2024.data$Q25_4
cf.flanders.2024.data$govt.groen <- cf.flanders.2024.data$Q25_5
cf.flanders.2024.data$govt.vooruit <- cf.flanders.2024.data$Q25_6
cf.flanders.2024.data$govt.cdv <- cf.flanders.2024.data$Q25_7
cf.flanders.2024.data$govt.ovld <- cf.flanders.2024.data$Q25_9
cf.flanders.2024.data$govt.nva <- cf.flanders.2024.data$Q25_10
cf.flanders.2024.data$govt.vb <- cf.flanders.2024.data$Q25_11

################################################################################
## 2.1.5. CREATE POLITICAL INTEREST VARIABLE
################################################################################
#
# Purpose:
# - Construct a derived analysis variable used in descriptive tables/figures.
#
# Replication notes:
# - Pay attention to missing-value codes (e.g., -99) and labelled values.
# - If you adapt this script to new waves, confirm the source question codes first.
#

# Political interest: 3-category variable (2023 only)
cf.flanders.2023.data$interest3 <- NA
cf.flanders.2023.data$interest3[cf.flanders.2023.data$q22==1] <- 1
cf.flanders.2023.data$interest3[cf.flanders.2023.data$q22==2] <- 1
cf.flanders.2023.data$interest3[cf.flanders.2023.data$q22==3] <- 1
cf.flanders.2023.data$interest3[cf.flanders.2023.data$q22==4] <- 2
cf.flanders.2023.data$interest3[cf.flanders.2023.data$q22==5] <- 3
cf.flanders.2023.data$interest3[cf.flanders.2023.data$q22==-99] <- NA

# Political interest (3-category): Add value labels to answer categories (2023 only)
cf.flanders.2023.data$interest3 <- factor(cf.flanders.2023.data$interest3, 
                                          levels = c(1, 2, 3), 
                                          labels = c("Low", 
                                                     "Moderate", 
                                                     "High"))

# Political interest: Add empty column for 2024 dataset
cf.flanders.2024.data$interest3 <- NA

################################################################################
## 2.1.6. CREATE EDUCATION VARIABLE
################################################################################
#
# Purpose:
# - Construct a derived analysis variable used in descriptive tables/figures.
#
# Replication notes:
# - Pay attention to missing-value codes (e.g., -99) and labelled values.
# - If you adapt this script to new waves, confirm the source question codes first.
#

# Education (2024 only)
cf.flanders.2024.data$education <- cf.flanders.2024.data$Q2.4
cf.flanders.2024.data$education[cf.flanders.2024.data$Q2.4==-99] <- NA

# Education: Add value labels to answer categories (2024 only)
cf.flanders.2024.data$education <- factor(cf.flanders.2024.data$education, 
                                          levels = c(1, 2, 3, 4, 5, 6, 7, 8), 
                                          labels = c("Primary education", 
                                                     "Lower secondary education", 
                                                     "Upper secondary education",
                                                     "Graduate education",
                                                     "Professional bachelor's degree",
                                                     "Academic bachelor's degree",
                                                     "Master's degree",
                                                     "Doctoral degree"))

# Education: 3-category variable (2024 only)
cf.flanders.2024.data$education3 <- NA
cf.flanders.2024.data$education3[cf.flanders.2024.data$Q2.4==1] <- 1
cf.flanders.2024.data$education3[cf.flanders.2024.data$Q2.4==2] <- 1
cf.flanders.2024.data$education3[cf.flanders.2024.data$Q2.4==3] <- 1
cf.flanders.2024.data$education3[cf.flanders.2024.data$Q2.4==4] <- 2
cf.flanders.2024.data$education3[cf.flanders.2024.data$Q2.4==5] <- 2
cf.flanders.2024.data$education3[cf.flanders.2024.data$Q2.4==6] <- 3
cf.flanders.2024.data$education3[cf.flanders.2024.data$Q2.4==7] <- 3
cf.flanders.2024.data$education3[cf.flanders.2024.data$Q2.4==8] <- 3

# Education (3-category): Add value labels to answer categories (2024 only)
cf.flanders.2024.data$education3 <- factor(cf.flanders.2024.data$education3, 
                                           levels = c(1, 2, 3), 
                                           labels = c("Low", 
                                                      "Moderate", 
                                                      "High"))

# Education: Add empty column for 2023 dataset
cf.flanders.2023.data$education3 <- NA

################################################################################
## 2.1.7. CREATE ATTENTION VARIABLE
################################################################################
#
# Purpose:
# - Construct a derived analysis variable used in descriptive tables/figures.
#
# Replication notes:
# - Pay attention to missing-value codes (e.g., -99) and labelled values.
# - If you adapt this script to new waves, confirm the source question codes first.
#

# Attention paid to campaign (2024 only)
cf.flanders.2024.data$follow <- cf.flanders.2024.data$Q15.7
cf.flanders.2024.data$follow[cf.flanders.2024.data$Q15.7==-99] <- NA
cf.flanders.2024.data$follow[cf.flanders.2024.data$Q15.7==1] <- 4
cf.flanders.2024.data$follow[cf.flanders.2024.data$Q15.7==2] <- 3
cf.flanders.2024.data$follow[cf.flanders.2024.data$Q15.7==3] <- 2
cf.flanders.2024.data$follow[cf.flanders.2024.data$Q15.7==4] <- 1

# Attention paid to campaign: Add value labels to answer categories (2024 only)
cf.flanders.2024.data$follow <- factor(cf.flanders.2024.data$follow, 
                                       levels = c(1, 2, 3, 4), labels = c("Not closely at all", 
                                                                          "Not so closely", 
                                                                          "Closely",
                                                                          "Very closely"))

# Attention paid to campaign: 3-category variable (2024 only)
cf.flanders.2024.data$follow3 <- cf.flanders.2024.data$Q15.7
cf.flanders.2024.data$follow3[cf.flanders.2024.data$Q15.7==-99] <- NA
cf.flanders.2024.data$follow3[cf.flanders.2024.data$Q15.7==1] <- 3
cf.flanders.2024.data$follow3[cf.flanders.2024.data$Q15.7==2] <- 2
cf.flanders.2024.data$follow3[cf.flanders.2024.data$Q15.7==3] <- 1
cf.flanders.2024.data$follow3[cf.flanders.2024.data$Q15.7==4] <- 1

# Attention paid to campaign (3-category): Add value labels to answer categories (2024 only)
cf.flanders.2024.data$follow3 <- factor(cf.flanders.2024.data$follow3, 
                                        levels = c(1, 2, 3), labels = c("Low", 
                                                                        "Moderate",
                                                                        "High"))

# Attention paid to campaign: Add empty column for 2023 dataset
cf.flanders.2023.data$follow3 <- NA

################################################################################
## 2.1.8. CREATE SOPHISTICATION INDEX
################################################################################
#
# Purpose:
# - Construct a derived analysis variable used in descriptive tables/figures.
#
# Replication notes:
# - Pay attention to missing-value codes (e.g., -99) and labelled values.
# - If you adapt this script to new waves, confirm the source question codes first.
#

# Sophistication index (2024 only)
cf.flanders.2024.data$follow_z <- scale(as.numeric(cf.flanders.2024.data$follow))
cf.flanders.2024.data$education_z <- scale(as.numeric(cf.flanders.2024.data$education))

cf.flanders.2024.data$sophistication_index <- rowMeans(
  cf.flanders.2024.data[, c("follow_z", "education_z")], na.rm = TRUE
)

# Sophistication index: Add empty column for 2023 dataset
cf.flanders.2023.data$sophistication_index <- NA

################################################################################
## 2.1.9. SURVEY YEAR
################################################################################

# Survey year (2023)
cf.flanders.2023.data$survey <- 2023

# Survey year (2024)
cf.flanders.2024.data$survey <- 2024

################################################################################
## 2.1.10. SELECT COLUMNS
################################################################################
#
# Purpose:
# - Keep only variables needed for the descriptive analyses and plots.
#
# Replication notes:
# - This reduces memory use and prevents accidental reliance on unused columns.
# - If you extend analyses, add variables here so they are retained post-subset().
#

# Select columns (2023)
cf.flanders.2023.data <- cf.flanders.2023.data %>% 
  subset(select=c("survey", "seats.pvda", "seats.groen", "seats.vooruit",
                  "seats.cdv", "seats.ovld", "seats.nva", "seats.vb",
                  "govt.pvda", "govt.groen", "govt.vooruit",
                  "govt.cdv", "govt.ovld", "govt.nva", "govt.vb",
                  "education3", "interest3", "follow3", "sophistication_index"))

# Select columns (2024)
cf.flanders.2024.data <- cf.flanders.2024.data %>% 
  subset(select=c("survey", "seats.pvda", "seats.groen", "seats.vooruit",
                  "seats.cdv", "seats.ovld", "seats.nva", "seats.vb",
                  "govt.pvda", "govt.groen", "govt.vooruit",
                  "govt.cdv", "govt.ovld", "govt.nva", "govt.vb",
                  "education3", "interest3", "follow3", "sophistication_index"))

################################################################################
## 2.1.11. REMOVE LABELS
################################################################################
#
# Purpose:
# - Convert labelled vectors to plain R types (often needed for plotting/modeling).
#
# Replication notes:
# - labelled data can behave unexpectedly in dplyr/ggplot; stripping labels improves stability.
#

# Remove labels (2023)
cf.flanders.2023.data <- remove_labels(cf.flanders.2023.data)

# Remove labels (2024)
cf.flanders.2024.data <- remove_labels(cf.flanders.2024.data)

################################################################################
## 2.1.12. MERGE 2023 AND 2024 DATAFRAMES
################################################################################
#
# Purpose:
# - Stack the two waves into a single long dataset for pooled descriptives.
#
# Replication notes:
# - rbind() assumes identical column names and compatible types.
# - If you add new variables in one wave, harmonize them in the other before merging.
#

# Merge 2023 and 2024 dataframes
cf.flanders.data <- rbind(cf.flanders.2023.data, cf.flanders.2024.data)

# Add survey type
cf.flanders.data$type <- "(a) According to Citizens:"

# Seat expectations: Remove NA values
cf.flanders.data <- cf.flanders.data %>%
  replace_with_na(replace = list(seats.pvda = c(-99),
                                 seats.groen = c(-99),
                                 seats.vooruit = c(-99),
                                 seats.cdv = c(-99),
                                 seats.ovld = c(-99),
                                 seats.nva = c(-99),
                                 seats.vb = c(-99),
                                 govt.pvda = c(-99),
                                 govt.groen = c(-99),
                                 govt.vooruit = c(-99),
                                 govt.cdv = c(-99),
                                 govt.ovld = c(-99),
                                 govt.nva = c(-99),
                                 govt.vb = c(-99)))

cf.flanders.data$govt.pvda <- cf.flanders.data$govt.pvda - 1
cf.flanders.data$govt.groen <- cf.flanders.data$govt.groen - 1
cf.flanders.data$govt.vooruit <- cf.flanders.data$govt.vooruit - 1
cf.flanders.data$govt.cdv <- cf.flanders.data$govt.cdv - 1
cf.flanders.data$govt.ovld <- cf.flanders.data$govt.ovld - 1
cf.flanders.data$govt.nva <- cf.flanders.data$govt.nva - 1
cf.flanders.data$govt.vb <- cf.flanders.data$govt.vb - 1

################################################################################
## 2.1.13. SEAT EXPECTATIONS DATAFRAME
################################################################################
#
# Purpose:
# - Create harmonized variables representing expected seat change by party.
#
# Replication notes:
# - Note that item names differ across 2023 vs 2024 (q24_* vs Q24_*).
# - Downstream code assumes these derived variables exist with consistent names (seats.<party>).
#

# Create dataframe for seat expectations and drop NA values for seat expectations
cf.flanders.data.seats <- cf.flanders.data %>% drop_na(c(seats.pvda,seats.groen,
                                                         seats.vooruit,seats.cdv,
                                                         seats.ovld,seats.nva,
                                                         seats.vb,govt.pvda))

# Reshape from wide to long
cf.flanders.seats.long <- gather(cf.flanders.data.seats, party, seats, 
                                 seats.pvda:seats.vb, factor_key=TRUE)

# Remove NA values
cf.flanders.seats.long <- cf.flanders.seats.long %>% filter(!is.na(seats))
cf.flanders.seats.long <- cf.flanders.seats.long %>% filter(!is.na(party))

# Set levels and add value labels for new party column
cf.flanders.seats.long$party <- factor(cf.flanders.seats.long$party,
                                       levels = c('seats.pvda', 'seats.groen', 'seats.vooruit',
                                                  'seats.cdv', 'seats.ovld', 'seats.nva',
                                                  'seats.vb'), 
                                       labels = c("PVDA", "Groen", "Vooruit", "CD&V", 
                                                  "Open Vld", "N-VA", "Vlaams Belang"))

# Add value labels to answer categories for seats column
cf.flanders.seats.long$seats <- factor(cf.flanders.seats.long$seats,
                                       levels = c('13', '12', '1'), 
                                       labels = c("Fewer", "Same", "More"))

# Add new column for correct forecasts

cf.flanders.seats.long$correct <- NA

cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="PVDA" & cf.flanders.seats.long$seats=="More"] <- "Correct"
cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="PVDA" & cf.flanders.seats.long$seats=="Same"] <- "Incorrect"
cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="PVDA" & cf.flanders.seats.long$seats=="Fewer"] <- "Incorrect"

cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="Groen" & cf.flanders.seats.long$seats=="More"] <- "Incorrect"
cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="Groen" & cf.flanders.seats.long$seats=="Same"] <- "Incorrect"
cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="Groen" & cf.flanders.seats.long$seats=="Fewer"] <- "Correct"

cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="Vooruit" & cf.flanders.seats.long$seats=="More"] <- "Correct"
cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="Vooruit" & cf.flanders.seats.long$seats=="Same"] <- "Incorrect"
cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="Vooruit" & cf.flanders.seats.long$seats=="Fewer"] <- "Incorrect"

cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="CD&V" & cf.flanders.seats.long$seats=="More"] <- "Incorrect"
cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="CD&V" & cf.flanders.seats.long$seats=="Same"] <- "Incorrect"
cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="CD&V" & cf.flanders.seats.long$seats=="Fewer"] <- "Correct"

cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="Open Vld" & cf.flanders.seats.long$seats=="More"] <- "Incorrect"
cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="Open Vld" & cf.flanders.seats.long$seats=="Same"] <- "Incorrect"
cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="Open Vld" & cf.flanders.seats.long$seats=="Fewer"] <- "Correct"

cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="N-VA" & cf.flanders.seats.long$seats=="More"] <- "Incorrect"
cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="N-VA" & cf.flanders.seats.long$seats=="Same"] <- "Incorrect"
cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="N-VA" & cf.flanders.seats.long$seats=="Fewer"] <- "Correct"

cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="Vlaams Belang" & cf.flanders.seats.long$seats=="More"] <- "Correct"
cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="Vlaams Belang" & cf.flanders.seats.long$seats=="Same"] <- "Incorrect"
cf.flanders.seats.long$correct[cf.flanders.seats.long$party=="Vlaams Belang" & cf.flanders.seats.long$seats=="Fewer"] <- "Incorrect"

################################################################################
## 2.1.14. FIGURE: CHANGE IN SEATS ACCORDING TO CITIZENS (2023 AND 2024)
################################################################################
#
# Purpose:
# - Generate a publication-ready figure for the descriptives section.
#
# Replication notes:
# - If you save to disk, ensure an output folder exists and use a deterministic filename.
# - For exact reproduction, record package versions and any font/theme settings.
#

png("cf_seats.png", units="in", width=10, height=7, res=1200)

cf.seats.g <- cf.flanders.seats.long %>% ggplot(aes(x=as.factor(seats), fill=as.factor(party)))+
  geom_bar(aes(y=..count../tapply(..count.., ..fill.., sum)[..fill..]), 
           position="dodge", alpha=.25, data = . %>% filter(survey == 2023), width = 0.9, show.legend = FALSE) +
  geom_bar(aes(y=..count../tapply(..count.., ..fill.., sum)[..fill..]), 
           position="dodge", data = . %>% filter(survey == 2024), width = 0.6, show.legend = FALSE) +
  ylab('% Respondents') +
  scale_y_continuous(limits=c(0,1), 
                     breaks=seq(0,1,.25), labels = scales::percent) + facet_wrap(~party, ncol = 4, labeller = labeller(party = seats_labels)) +
  scale_x_discrete("More or Fewer Seats?") +
  scale_fill_manual(values=c("#990000","#04847A","#FF0000","#FF6000", "#005DAA", 
                             "#FCBD1B","black")) + theme_bw()
#labs(caption = "\n2023 survey (semi-transparent bars): n=879. 2024 survey (solid bars): n=856.\n\nQuestion: Regardless of your own political affiliation, can you say for each of the parties below how likely you think it is that\nthey will have more or fewer seats in the Flemish Parliament after the next elections compared to the previous elections?")

cf.seats.g + 
  theme(axis.title.x=element_text(margin=margin(t=8, r=0, b=0, l=0), size = 13),
        axis.title.y=element_text(margin=margin(t=0, r=8, b=0, l=0), size = 13),
        axis.text.x=element_text(size = 12),
        axis.text.y=element_text(size = 12),
        strip.text = element_text(size = 12),
        plot.caption = element_text(size = 12, hjust = 0, face = "italic"),
        plot.title = element_blank())

dev.off()

################################################################################
## 2.1.15. FIGURE: CHANGE IN SEATS ACCORDING TO CITIZENS BY EDUCATION LEVEL
################################################################################
#
# Purpose:
# - Generate a publication-ready figure for the descriptives section.
#
# Replication notes:
# - If you save to disk, ensure an output folder exists and use a deterministic filename.
# - For exact reproduction, record package versions and any font/theme settings.
#

# Set levels for education (3-category) variable
cf.flanders.seats.long$education3 <- factor(cf.flanders.seats.long$education3,
                                            levels = c("Low", "Moderate", "High"))

# Compute percentage of seat expectations for each party by education level
cf.pct.by.educ <- cf.flanders.seats.long %>%
  dplyr::filter(!is.na(seats), !is.na(education3), !is.na(party)) %>%
  dplyr::group_by(party, education3, seats) %>%
  dplyr::summarise(count = n(), .groups = "drop") %>%
  dplyr::group_by(party, education3) %>%
  dplyr::mutate(percentage = count / sum(count)) %>%
  ungroup()

# Create figure

png("cf_seats_by_educ.png", units="in", width=10, height=7, res=1200)

ggplot(cf.pct.by.educ, aes(x = seats, y = percentage, fill = education3)) +
  geom_col(position = "dodge", width = 0.8) +
  facet_wrap(~party, ncol = 4, labeller = labeller(party = seats_labels)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  scale_fill_manual(
    values = c(
      "Low" = "gray80",
      "Moderate" = "gray40",
      "High" = "black"
    )
  ) +
  labs(
    x = "More or Fewer Seats?",
    y = "% Respondents",
    fill = "Education Level") +
  theme_bw() +
  theme(
    panel.border = element_rect(colour = "black", fill = NA, size = 0.8),
    strip.background = element_rect(fill = "gray90", colour = "black", size = 0.8),
    strip.text = element_text(size = 12, face = "plain"),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11),
    axis.title.x = element_text(size = 13, margin = margin(t = 12)),
    axis.title.y = element_text(size = 13, margin = margin(r = 12)),
    plot.title = element_blank(),
    legend.position = "bottom",
    legend.box = "horizontal",
    legend.margin = margin(t = 10),
    legend.title = element_text(size = 11),
    legend.text = element_text(size = 10),
    legend.key.size = unit(0.4, "cm")
  )

dev.off()

################################################################################
## 2.1.16. FIGURE: CHANGE IN SEATS ACCORDING TO CITIZENS BY INTEREST LEVEL
################################################################################
#
# Purpose:
# - Generate a publication-ready figure for the descriptives section.
#
# Replication notes:
# - If you save to disk, ensure an output folder exists and use a deterministic filename.
# - For exact reproduction, record package versions and any font/theme settings.
#

# Set levels for political interest (3-category) variable
cf.flanders.seats.long$interest3 <- factor(cf.flanders.seats.long$interest3,
                                           levels = c("Low", "Moderate", "High"))

# Compute percentage of seat expectations for each party by interest level
cf.pct.by.interest <- cf.flanders.seats.long %>%
  dplyr::filter(!is.na(seats), !is.na(interest3), !is.na(party)) %>%
  dplyr::group_by(party, interest3, seats) %>%
  dplyr::summarise(count = n(), .groups = "drop") %>%
  dplyr::group_by(party, interest3) %>%
  dplyr::mutate(percentage = count / sum(count)) %>%
  ungroup()

# Create figure

png("cf_seats_by_interest.png", units="in", width=10, height=7, res=1200)

ggplot(cf.pct.by.interest, aes(x = seats, y = percentage, fill = interest3)) +
  geom_col(position = "dodge", width = 0.8) +
  facet_wrap(~party, ncol = 4, labeller = labeller(party = seats_labels)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  scale_fill_manual(
    values = c(
      "Low" = "gray80",
      "Moderate" = "gray40",
      "High" = "black"
    )
  ) +
  labs(
    x = "More or Fewer Seats?",
    y = "% Respondents",
    fill = "Interest Level") +
  theme_bw() +
  theme(
    panel.border = element_rect(colour = "black", fill = NA, size = 0.8),
    strip.background = element_rect(fill = "gray90", colour = "black", size = 0.8),
    strip.text = element_text(size = 12, face = "plain"),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11),
    axis.title.x = element_text(size = 13, margin = margin(t = 12)),
    axis.title.y = element_text(size = 13, margin = margin(r = 12)),
    plot.title = element_blank(),
    legend.position = "bottom",
    legend.box = "horizontal",
    legend.margin = margin(t = 10),
    legend.title = element_text(size = 11),
    legend.text = element_text(size = 10),
    legend.key.size = unit(0.4, "cm")
  )

dev.off()

################################################################################
## 2.1.17. FIGURE: CHANGE IN SEATS ACCORDING TO CITIZENS BY ATTENTION LEVEL
################################################################################
#
# Purpose:
# - Generate a publication-ready figure for the descriptives section.
#
# Replication notes:
# - If you save to disk, ensure an output folder exists and use a deterministic filename.
# - For exact reproduction, record package versions and any font/theme settings.
#

# Set levels for attention (3-category) variable
cf.flanders.seats.long$follow3 <- factor(cf.flanders.seats.long$follow3,
                                         levels = c("Low", "Moderate", "High"))

# Compute percentage of seat expectations for each party by attention level
cf.pct.by.attention <- cf.flanders.seats.long %>%
  dplyr::filter(!is.na(seats), !is.na(follow3), !is.na(party)) %>%
  dplyr::group_by(party, follow3, seats) %>%
  dplyr::summarise(count = n(), .groups = "drop") %>%
  dplyr::group_by(party, follow3) %>%
  dplyr::mutate(percentage = count / sum(count)) %>%
  ungroup()

# Create figure

png("cf_seats_by_attention.png", units="in", width=10, height=7, res=1200)

ggplot(cf.pct.by.attention, aes(x = seats, y = percentage, fill = follow3)) +
  geom_col(position = "dodge", width = 0.8) +
  facet_wrap(~party, ncol = 4, labeller = labeller(party = seats_labels)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  scale_fill_manual(
    values = c("Low" = "gray80", 
               "Moderate" = "gray40", 
               "High" = "black"
    )
  ) +
  labs(
    x = "More or Fewer Seats?",
    y = "% Respondents",
    fill = "Attention Level") +
  theme_bw() +
  theme(
    panel.border = element_rect(colour = "black", fill = NA, size = 0.8),
    strip.background = element_rect(fill = "gray90", colour = "black", size = 0.8),
    strip.text = element_text(size = 12, face = "plain"),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11),
    axis.title.x = element_text(size = 13, margin = margin(t = 12)),
    axis.title.y = element_text(size = 13, margin = margin(r = 12)),
    plot.title = element_blank(),
    legend.position = "bottom",
    legend.box = "horizontal",
    legend.margin = margin(t = 10),
    legend.title = element_text(size = 11),
    legend.text = element_text(size = 10),
    legend.key.size = unit(0.4, "cm")
  )

dev.off()

################################################################################
## 2.1.18. FIGURE: DISTRIBUTION OF POLITICAL SOPHISTICATION BY SEAT EXPECTATIONS 
## ACROSS PARTIES
################################################################################

# Calculate stats per group
#cf_stats <- cf.flanders.seats.long %>%
#filter(is.finite(sophistication_index)) %>%
#dplyr::group_by(party, seats) %>%
#dplyr::summarise(
#mean_si = mean(sophistication_index, na.rm = TRUE),
#skew_si = skewness(sophistication_index, na.rm = TRUE),
#kurt_si = kurtosis(sophistication_index, na.rm = TRUE),
#.groups = 'drop'
#)

png("cf_seats_index.png", units = "in", width = 10, height = 7, res = 1200)

ggplot(
  data = cf.flanders.seats.long %>% filter(is.finite(sophistication_index)),
  aes(x = sophistication_index, y = seats, fill = seats)
) +
  geom_density_ridges(alpha = 0.8, scale = 1.2, show.legend = FALSE) +
  # Add mean lines
  #geom_vline(
  #data = cf_stats,
  #aes(xintercept = mean_si, y = seats),
  #linetype = "dashed", color = "black", size = 0.4, inherit.aes = FALSE
  #) +
  # Optionally add skewness/kurtosis labels
  #geom_text(
  #data = cf_stats,
  #aes(
  #x = mean_si,
  #y = seats,
  #label = paste0("μ=", round(mean_si, 2), 
  #"\nsk=", round(skew_si, 2),
  #"\nku=", round(kurt_si, 2))
  #),
  #hjust = -0.1, vjust = 0.5, size = 2.7, inherit.aes = FALSE
  #)
  facet_wrap(~party, ncol = 4, labeller = labeller(party = seats_labels)) +
  coord_cartesian(xlim = c(-2.1, 1.6)) +
  labs(
    x = "Sophistication Index",
    y = "Seat Expectation"
  ) +
  theme_bw() +
  theme(
    panel.border = element_rect(colour = "black", fill = NA, size = 0.8),
    strip.background = element_rect(fill = "gray90", colour = "black", size = 0.8),
    strip.text = element_text(size = 12, face = "plain"),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11),
    axis.title.x = element_text(size = 13, margin = margin(t = 12)),
    axis.title.y = element_text(size = 13, margin = margin(r = 12)),
    plot.title = element_blank()
  )

dev.off()

png("cf_seats_index_correct.png", units = "in", width = 10, height = 7, res = 1200)

ggplot(
  data = cf.flanders.seats.long %>% filter(is.finite(sophistication_index)),
  aes(x = sophistication_index, y = correct, fill = correct)
) +
  geom_density_ridges(alpha = 0.8, scale = 1.2, show.legend = FALSE) +
  # Add mean lines
  #geom_vline(
  #data = cf_stats,
  #aes(xintercept = mean_si, y = seats),
  #linetype = "dashed", color = "black", size = 0.4, inherit.aes = FALSE
  #) +
  # Optionally add skewness/kurtosis labels
  #geom_text(
  #data = cf_stats,
  #aes(
  #x = mean_si,
  #y = seats,
  #label = paste0("μ=", round(mean_si, 2), 
  #"\nsk=", round(skew_si, 2),
  #"\nku=", round(kurt_si, 2))
  #),
  #hjust = -0.1, vjust = 0.5, size = 2.7, inherit.aes = FALSE
  #)
  facet_wrap(~party, ncol = 4, labeller = labeller(party = seats_labels)) +
  coord_cartesian(xlim = c(-2.1, 1.6)) +
  labs(
    x = "Sophistication Index",
    y = "Seat Expectation"
  ) +
  theme_bw() +
  theme(
    panel.border = element_rect(colour = "black", fill = NA, size = 0.8),
    strip.background = element_rect(fill = "gray90", colour = "black", size = 0.8),
    strip.text = element_text(size = 12, face = "plain"),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11),
    axis.title.x = element_text(size = 13, margin = margin(t = 12)),
    axis.title.y = element_text(size = 13, margin = margin(r = 12)),
    plot.title = element_blank()
  )

dev.off()

################################################################################
## 2.1.19. GOVERNMENT EXPECTATIONS DATAFRAME
################################################################################
#
# Purpose:
# - Create harmonized variables representing expectations about parties entering government.
#
# Replication notes:
# - Item names differ across 2023 vs 2024 (q25_* vs Q25_*).
# - Later, the script shifts some government variables by -1 to align scales across waves.
#

# Create dataframe for government expectations
cf.flanders.data.govt <- cf.flanders.data %>% drop_na(c(govt.pvda,
                                                        govt.groen,govt.vooruit,
                                                        govt.cdv,govt.ovld,
                                                        govt.nva,govt.vb))

# Reshape from wide to long
cf.flanders.govt.long <- gather(cf.flanders.data.govt, party, govt, govt.pvda:govt.vb, factor_key=TRUE)

# Remove NA values
cf.flanders.govt.long <- cf.flanders.govt.long %>% filter(!is.na(govt))
cf.flanders.govt.long <- cf.flanders.govt.long %>% filter(!is.na(party))

# Set levels and add value labels for new party column
cf.flanders.govt.long$party <- factor(cf.flanders.govt.long$party,
                                      levels = c('govt.pvda', 'govt.groen', 'govt.vooruit',
                                                 'govt.cdv', 'govt.ovld', 'govt.nva',
                                                 'govt.vb'), 
                                      labels = c("PVDA", "Groen", "Vooruit", "CD&V", 
                                                 "Open Vld", "N-VA", "Vlaams Belang"))

################################################################################
## 2.1.20. FIGURE: PROB. OF BEING IN GOVT ACCORDING TO CITIZENS (2023 AND 2024)
################################################################################
#
# Purpose:
# - Generate a publication-ready figure for the descriptives section.
#
# Replication notes:
# - If you save to disk, ensure an output folder exists and use a deterministic filename.
# - For exact reproduction, record package versions and any font/theme settings.
#

png("cf_govt.png", units="in", width=10, height=5, res=1200)

cf.govt.g <- cf.flanders.govt.long %>% ggplot(aes(x=reorder(party, govt), y=govt, fill=party)) + 
  geom_bar(stat="summary", fun="mean", alpha=.25, data = . %>% filter(survey == 2023), width = 0.9, show.legend = FALSE) +
  geom_bar(stat="summary", fun="mean", data = . %>% filter(survey == 2024), width = 0.6, show.legend = FALSE) +
  scale_y_continuous("Average Probability", breaks = seq(0,10,2)) +
  scale_x_discrete("") +
  scale_fill_manual(values=c("#990000","#04847A","#FF0000","#FF6000", "#005DAA", 
                             "#FCBD1B","black")) + theme_bw()

cf.govt.g + 
  theme(axis.title.x=element_blank(),
        axis.title.y=element_text(margin=margin(t=0, r=8, b=0, l=0), size = 15),
        axis.text.x=element_text(size = 14),
        axis.text.y=element_text(size = 14),
        plot.title = element_blank())

dev.off()

################################################################################
## 2.1.21. FIGURE: AVERAGE GOVT PROB. ACCORDING TO CITIZENS BY EDUCATION LEVEL
################################################################################
#
# Purpose:
# - Generate a publication-ready figure for the descriptives section.
#
# Replication notes:
# - If you save to disk, ensure an output folder exists and use a deterministic filename.
# - For exact reproduction, record package versions and any font/theme settings.
#

# Compute average expected probability of party being in government by education level
cf.avg.govt.educ <- cf.flanders.govt.long %>%
  dplyr::filter(!is.na(govt), !is.na(education3), !is.na(party)) %>%
  dplyr::group_by(party, education3) %>%
  dplyr::summarize(mean_govt = mean(govt), .groups = "drop")

cf.avg.govt.educ$education3 <- factor(cf.avg.govt.educ$education3,
                                      levels = c("Low", "Moderate", "High"))

# Create figure

png("cf_govt_by_educ.png", units = "in", width = 10, height = 7, res = 1200)

ggplot(cf.avg.govt.educ, aes(x = education3, y = mean_govt, fill = education3)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~ party, ncol = 4, labeller = labeller(party = govt_labels)) +
  labs(
    x = "Education Level",
    y = "Average Probability"
  ) +
  scale_fill_manual(
    values = c("Low" = "gray80", "Moderate" = "gray40", "High" = "black")
  ) +
  theme_bw() +
  theme(
    panel.border = element_rect(colour = "black", fill = NA, size = 0.8),
    strip.background = element_rect(fill = "gray90", colour = "black", size = 0.8),
    strip.text = element_text(size = 12, face = "plain"),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11),
    axis.title.x = element_text(size = 13, margin = margin(t = 12)),
    axis.title.y = element_text(size = 13, margin = margin(r = 12)),
    plot.title = element_blank(),
    legend.position = "none",
    legend.box = "horizontal",
    legend.margin = margin(t = 10),
    legend.title = element_text(size = 11),
    legend.text = element_text(size = 10),
    legend.key.size = unit(0.4, "cm")  # or smaller if needed
  )

dev.off()

################################################################################
## 2.1.22. FIGURE: AVERAGE GOVT PROB. ACCORDING TO CITIZENS BY INTEREST LEVEL
################################################################################
#
# Purpose:
# - Generate a publication-ready figure for the descriptives section.
#
# Replication notes:
# - If you save to disk, ensure an output folder exists and use a deterministic filename.
# - For exact reproduction, record package versions and any font/theme settings.
#

# Compute average expected probability of party being in government by political interest level
cf.avg.govt.interest <- cf.flanders.govt.long %>%
  dplyr::filter(!is.na(govt), !is.na(interest3), !is.na(party)) %>%
  dplyr::group_by(party, interest3) %>%
  dplyr::summarize(mean_govt = mean(govt), .groups = "drop")

# Create figure

png("cf_govt_by_interest.png", units = "in", width = 10, height = 7, res = 1200)

ggplot(cf.avg.govt.interest, aes(x = interest3, y = mean_govt, fill = interest3)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~ party, ncol = 4, labeller = labeller(party = govt_labels)) +
  labs(
    x = "Interest Level",
    y = "Average Probability"
  ) +
  scale_fill_manual(
    values = c("Low" = "gray80", "Moderate" = "gray40", "High" = "black")
  ) +
  theme_bw() +
  theme(
    panel.border = element_rect(colour = "black", fill = NA, size = 0.8),
    strip.background = element_rect(fill = "gray90", colour = "black", size = 0.8),
    strip.text = element_text(size = 12, face = "plain"),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11),
    axis.title.x = element_text(size = 13, margin = margin(t = 12)),
    axis.title.y = element_text(size = 13, margin = margin(r = 12)),
    plot.title = element_blank(),
    legend.position = "none",
    legend.box = "horizontal",
    legend.margin = margin(t = 10),
    legend.title = element_text(size = 11),
    legend.text = element_text(size = 10),
    legend.key.size = unit(0.4, "cm")  # or smaller if needed
  )

dev.off()

################################################################################
## 2.1.23. FIGURE: AVERAGE GOVT PROB. ACCORDING TO CITIZENS BY ATTENTION LEVEL
################################################################################
#
# Purpose:
# - Generate a publication-ready figure for the descriptives section.
#
# Replication notes:
# - If you save to disk, ensure an output folder exists and use a deterministic filename.
# - For exact reproduction, record package versions and any font/theme settings.
#

# Set levels for attention (3-category) variable
cf.flanders.govt.long$follow3 <- factor(cf.flanders.govt.long$follow3,
                                        levels = c("Low", "Moderate", "High"))

# Compute average expected probability of party being in government by attention level
cf.avg.govt.attention <- cf.flanders.govt.long %>%
  dplyr::filter(!is.na(govt), !is.na(follow3), !is.na(party)) %>%
  dplyr::group_by(party, follow3) %>%
  dplyr::summarize(mean_govt = mean(govt), .groups = "drop")

# Create figure

png("cf_govt_by_attention.png", units = "in", width = 10, height = 7, res = 1200)

ggplot(cf.avg.govt.attention, aes(x = follow3, y = mean_govt, fill = follow3)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~ party, ncol = 4, labeller = labeller(party = govt_labels)) +
  labs(
    x = "Attention Level",
    y = "Average Probability"
  ) +
  scale_fill_manual(
    values = c("Low" = "gray80", "Moderate" = "gray40", "High" = "black")
  ) +
  theme_bw() +
  theme(
    panel.border = element_rect(colour = "black", fill = NA, size = 0.8),
    strip.background = element_rect(fill = "gray90", colour = "black", size = 0.8),
    strip.text = element_text(size = 12, face = "plain"),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11),
    axis.title.x = element_text(size = 13, margin = margin(t = 12)),
    axis.title.y = element_text(size = 13, margin = margin(r = 12)),
    plot.title = element_blank(),
    legend.position = "none",
    legend.box = "horizontal",
    legend.margin = margin(t = 10),
    legend.title = element_text(size = 11),
    legend.text = element_text(size = 10),
    legend.key.size = unit(0.4, "cm")  # or smaller if needed
  )

dev.off()

################################################################################
## 2.1.24. FIGURE: SCATTERPLOT EXPECTED PROBABILITY BY SOPHISTICATION
################################################################################
#
# Purpose:
# - Generate a publication-ready figure for the descriptives section.
#
# Replication notes:
# - If you save to disk, ensure an output folder exists and use a deterministic filename.
# - For exact reproduction, record package versions and any font/theme settings.
#

png("cf_govt_index.png", units = "in", width = 10, height = 5, res = 1200)

ggplot(
  cf.flanders.govt.long %>% filter(!is.na(sophistication_index), !is.na(govt)),
  aes(x = sophistication_index, y = govt, color = party)
) +
  geom_jitter(alpha = 0.3, width = 0.1, height = 0.02, color = "gray40") +
  geom_smooth(method = "loess", se = FALSE, linewidth = 1) +
  facet_wrap(~ party, ncol = 4, labeller = labeller(party = govt_labels)) +
  scale_color_manual(values = c(
    "PVDA" = "#990000",
    "Groen" = "#04847A",
    "Vooruit" = "#FF0000",
    "CD&V" = "#FF6000",
    "Open Vld" = "#005DAA",
    "N-VA" = "#FCBD1B",
    "Vlaams Belang" = "black"
  )) +
  labs(
    x = "Sophistication Index",
    y = "Probability"
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",
    axis.title.x = element_text(size = 14, margin = margin(t = 12)),
    axis.title.y = element_text(size = 14, margin = margin(r = 12)),
    axis.text = element_text(size = 12),
    strip.text = element_text(size = 13)  # no bold, smaller size
  )

dev.off()

################################################################################
## 2.2. POLITICIAN SURVEYS
################################################################################
#
# Purpose:
# - Prepare and analyze politician survey data (2023 & 2024) for descriptives/figures.
#
# Replication notes:
# - Ensure politician survey files are available and named as expected in the load step.
# - The 'type' field is used later to combine citizen vs politician outputs in merged plots.
#

################################################################################
## 2.2.1. LOAD DATA (2023)
################################################################################
#
# Purpose:
# - Read the raw survey file for this wave/year into memory.
#
# Replication notes:
# - Ensure the .sav file is present in the working directory (or adjust the relative path).
# - haven::read_sav() preserves SPSS labels; subsequent steps may remove/convert labels.
# - After loading, the script standardizes column names/cases to reduce downstream surprises.
#

# Load data (2023)
pf.flanders.2023.data <- read.dta("Data_Politicians_2023.dta")

################################################################################
## 2.2.2. LOAD DATA (2024)
################################################################################
#
# Purpose:
# - Read the raw survey file for this wave/year into memory.
#
# Replication notes:
# - Ensure the .sav file is present in the working directory (or adjust the relative path).
# - haven::read_sav() preserves SPSS labels; subsequent steps may remove/convert labels.
# - After loading, the script standardizes column names/cases to reduce downstream surprises.
#

pf.flanders.2024.data <- read_dta('Data_Politicians_2024.dta')

################################################################################
## 2.2.3. SEAT EXPECTATIONS
################################################################################
#
# Purpose:
# - Create harmonized variables representing expected seat change by party.
#
# Replication notes:
# - Note that item names differ across 2023 vs 2024 (q24_* vs Q24_*).
# - Downstream code assumes these derived variables exist with consistent names (seats.<party>).
#

# Seat expectations (2023)
pf.flanders.2023.data$seats.pvda <- pf.flanders.2023.data$Q9_1_1
pf.flanders.2023.data$seats.groen <- pf.flanders.2023.data$Q9_1_2
pf.flanders.2023.data$seats.vooruit <- pf.flanders.2023.data$Q9_1_3
pf.flanders.2023.data$seats.cdv <- pf.flanders.2023.data$Q9_1_4
pf.flanders.2023.data$seats.ovld <- pf.flanders.2023.data$Q9_1_5
pf.flanders.2023.data$seats.nva <- pf.flanders.2023.data$Q9_1_6
pf.flanders.2023.data$seats.vb <- pf.flanders.2023.data$Q9_1_7

# Seat expectations (2024)
pf.flanders.2024.data$seats.pvda <- pf.flanders.2024.data$Q1_4
pf.flanders.2024.data$seats.groen <- pf.flanders.2024.data$Q1_5
pf.flanders.2024.data$seats.vooruit <- pf.flanders.2024.data$Q1_6
pf.flanders.2024.data$seats.cdv <- pf.flanders.2024.data$Q1_7
pf.flanders.2024.data$seats.ovld <- pf.flanders.2024.data$Q1_9
pf.flanders.2024.data$seats.nva <- pf.flanders.2024.data$Q1_10
pf.flanders.2024.data$seats.vb <- pf.flanders.2024.data$Q1_11

################################################################################
## 2.2.4. GOVERNMENT EXPECTATIONS
################################################################################
#
# Purpose:
# - Create harmonized variables representing expectations about parties entering government.
#
# Replication notes:
# - Item names differ across 2023 vs 2024 (q25_* vs Q25_*).
# - Later, the script shifts some government variables by -1 to align scales across waves.
#

# Government expectations (2023)
pf.flanders.2023.data$govt.pvda <- pf.flanders.2023.data$Q9_2_1
pf.flanders.2023.data$govt.groen <- pf.flanders.2023.data$Q9_2_2
pf.flanders.2023.data$govt.vooruit <- pf.flanders.2023.data$Q9_2_3
pf.flanders.2023.data$govt.cdv <- pf.flanders.2023.data$Q9_2_4
pf.flanders.2023.data$govt.ovld <- pf.flanders.2023.data$Q9_2_5
pf.flanders.2023.data$govt.nva <- pf.flanders.2023.data$Q9_2_6
pf.flanders.2023.data$govt.vb <- pf.flanders.2023.data$Q9_2_7

# Government expectations (2024)
pf.flanders.2024.data$govt.pvda <- pf.flanders.2024.data$Q2_4
pf.flanders.2024.data$govt.groen <- pf.flanders.2024.data$Q2_5
pf.flanders.2024.data$govt.vooruit <- pf.flanders.2024.data$Q2_6
pf.flanders.2024.data$govt.cdv <- pf.flanders.2024.data$Q2_7
pf.flanders.2024.data$govt.ovld <- pf.flanders.2024.data$Q2_9
pf.flanders.2024.data$govt.nva <- pf.flanders.2024.data$Q2_10
pf.flanders.2024.data$govt.vb <- pf.flanders.2024.data$Q2_11

################################################################################
## 2.2.5. PARTY AFFILIATION
################################################################################

# Party affiliation: Many party IDs? (2023)
pf.flanders.2023.data$manyids <- apply(pf.flanders.2023.data[, c("Q3_2_1", "Q3_2_2", "Q3_2_3", "Q3_2_4", 
                                                                 "Q3_2_5", "Q3_2_6", "Q3_2_7", "Q3_2_8",
                                                                 "Q3_2_9")], 1, function(x) sum(x == 1) > 1)

# Party affiliation: Convert party ID columns to character (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>%
  mutate_at(vars("Q3_2_1", "Q3_2_2", "Q3_2_3", "Q3_2_4", "Q3_2_5", "Q3_2_6", "Q3_2_7", "Q3_2_8"),
            as.character)

# Party affiliation: Remove -99 values and recode (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>%
  dplyr::mutate(Q3_2_1 = recode(Q3_2_1, "1" = "CD&V", "-99" = ""),
                Q3_2_2 = recode(Q3_2_2, "1" = "Groen", "-99" = ""),
                Q3_2_3 = recode(Q3_2_3, "1" = "N-VA", "-99" = ""),
                Q3_2_4 = recode(Q3_2_4, "1" = "Open Vld", "-99" = ""),
                Q3_2_5 = recode(Q3_2_5, "1" = "PVDA", "-99" = ""),
                Q3_2_6 = recode(Q3_2_6, "1" = "Vooruit", "-99" = ""),
                Q3_2_7 = recode(Q3_2_7, "1" = "Vlaams Belang", "-99" = ""),
                Q3_2_8 = recode(Q3_2_8, "1" = "Other/Independent", "-99" = ""),
                Q3_2_9 = recode(Q3_2_9, "1" = "Other/Independent", "-99" = "")
  )

# Party affiliation: Concatenate values from party IDs columns in a single column (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>%
  dplyr::mutate(affiliation2 = apply(pf.flanders.2023.data[, c("Q3_2_1", "Q3_2_2", "Q3_2_3", "Q3_2_4", "Q3_2_5", "Q3_2_6", "Q3_2_7", "Q3_2_8")], 1, function(x) {
    # Remove NA and empty strings, then concatenate non-empty values
    paste(na.omit(x[x != ""]), collapse = ", ")
  }))


# Party affiliation: Create party affiliation column (2023)
pf.flanders.2023.data$affiliation <- pf.flanders.2023.data$affiliation2

# Party affiliation: Replace multiple party ids by NAs (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>% 
  mutate(affiliation = ifelse(manyids == TRUE, NA, affiliation))

# Party affiliation: Add value labels to answer categories (2023)
pf.flanders.2023.data$affiliation <- factor(pf.flanders.2023.data$affiliation,
                                            levels = c("PVDA", "Groen", "Vooruit", "CD&V", 
                                                       "Open Vld", "N-VA", "Vlaams Belang",
                                                       "Other/Independent"), 
                                            labels = c("PVDA", "Groen", "Vooruit", "CD&V", 
                                                       "Open Vld", "N-VA", "Vlaams Belang",
                                                       "Other/Independent"))

# Party affiliation (2024)
pf.flanders.2024.data$affiliation <- ""
pf.flanders.2024.data$affiliation[pf.flanders.2024.data$Q26==2] <- "CD&V"
pf.flanders.2024.data$affiliation[pf.flanders.2024.data$Q26==3] <- "Groen"
pf.flanders.2024.data$affiliation[pf.flanders.2024.data$Q26==5] <- "N-VA"
pf.flanders.2024.data$affiliation[pf.flanders.2024.data$Q26==4] <- "Open Vld"
pf.flanders.2024.data$affiliation[pf.flanders.2024.data$Q26==8] <- "PVDA"
pf.flanders.2024.data$affiliation[pf.flanders.2024.data$Q26==1] <- "Vooruit"
pf.flanders.2024.data$affiliation[pf.flanders.2024.data$Q26==6] <- "Vlaams Belang"
pf.flanders.2024.data$affiliation[pf.flanders.2024.data$Q26==7] <- "Other/Independent"
pf.flanders.2024.data$affiliation[pf.flanders.2024.data$Q26==9] <- "Other/Independent"

# Party affiliation: Add value labels for answer categories (2024)
pf.flanders.2024.data$affiliation <- factor(pf.flanders.2024.data$affiliation,
                                            levels = c("PVDA", "Groen", "Vooruit", "CD&V", 
                                                       "Open Vld", "N-VA", "Vlaams Belang",
                                                       "Other/Independent"), 
                                            labels = c("PVDA", "Groen", "Vooruit", "CD&V", 
                                                       "Open Vld", "N-VA", "Vlaams Belang",
                                                       "Other/Independent"))

################################################################################
## 2.2.6. EDUCATION
################################################################################

# Education: 3-category variable (2023 only)
pf.flanders.2023.data$education3 <- as.numeric(pf.flanders.2023.data$Q4_3)
pf.flanders.2023.data$education3 <- NA
pf.flanders.2023.data$education3[pf.flanders.2023.data$Q4_3==1] <- 1
pf.flanders.2023.data$education3[pf.flanders.2023.data$Q4_3==2] <- 1
pf.flanders.2023.data$education3[pf.flanders.2023.data$Q4_3==3] <- 2
pf.flanders.2023.data$education3[pf.flanders.2023.data$Q4_3==4] <- 3
pf.flanders.2023.data$education3[pf.flanders.2023.data$Q4_3==-99] <- NA

# Education (3-category): Add value labels for answer categories (2023 only)
pf.flanders.2023.data$education3 <- factor(pf.flanders.2023.data$education3, 
                                           levels = c(1, 2, 3), 
                                           labels = c("Low", "Moderate", "High"))

# Education (3-category): Add empty column for 2024 dataframe
pf.flanders.2024.data$education3 <- NA

################################################################################
## 2.2.7. SURVEY YEAR
################################################################################

# Survey year (2023)
pf.flanders.2023.data$survey <- 2023

# Survey year (2024)
pf.flanders.2024.data$survey <- 2024

################################################################################
## 2.2.8. SELECT COLUMNS
################################################################################
#
# Purpose:
# - Keep only variables needed for the descriptive analyses and plots.
#
# Replication notes:
# - This reduces memory use and prevents accidental reliance on unused columns.
# - If you extend analyses, add variables here so they are retained post-subset().
#

# Select columns (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>% 
  subset(select=c("survey", "seats.pvda", "seats.groen", "seats.vooruit",
                  "seats.cdv", "seats.ovld", "seats.nva", "seats.vb",
                  "govt.pvda", "govt.groen", "govt.vooruit",
                  "govt.cdv", "govt.ovld", "govt.nva", "govt.vb", 
                  "affiliation", "education3"))

# Select columns (2024)
pf.flanders.2024.data <- pf.flanders.2024.data %>% 
  subset(select=c("survey", "seats.pvda", "seats.groen", "seats.vooruit",
                  "seats.cdv", "seats.ovld", "seats.nva", "seats.vb",
                  "govt.pvda", "govt.groen", "govt.vooruit",
                  "govt.cdv", "govt.ovld", "govt.nva", "govt.vb", 
                  "affiliation", "education3"))

################################################################################
## 2.2.9. MERGE 2023 AND 2024 DATAFRAMES
################################################################################
#
# Purpose:
# - Stack the two waves into a single long dataset for pooled descriptives.
#
# Replication notes:
# - rbind() assumes identical column names and compatible types.
# - If you add new variables in one wave, harmonize them in the other before merging.
#

# Merge 2023 and 2024 dataframes
pf.flanders.data <- rbind(pf.flanders.2023.data, pf.flanders.2024.data)

# Add survey type
pf.flanders.data$type <- "(b) According to Politicians:"

# Seat expectations: Remove NA values
pf.flanders.data <- pf.flanders.data %>%
  replace_with_na(replace = list(seats.pvda = c(-99),
                                 seats.groen = c(-99),
                                 seats.vooruit = c(-99),
                                 seats.cdv = c(-99),
                                 seats.ovld = c(-99),
                                 seats.nva = c(-99),
                                 seats.vb = c(-99),
                                 govt.pvda = c(-99),
                                 govt.groen = c(-99),
                                 govt.vooruit = c(-99),
                                 govt.cdv = c(-99),
                                 govt.ovld = c(-99),
                                 govt.nva = c(-99),
                                 govt.vb = c(-99)))

pf.flanders.data$govt.pvda <- pf.flanders.data$govt.pvda - 1
pf.flanders.data$govt.groen <- pf.flanders.data$govt.groen - 1
pf.flanders.data$govt.vooruit <- pf.flanders.data$govt.vooruit - 1
pf.flanders.data$govt.cdv <- pf.flanders.data$govt.cdv - 1
pf.flanders.data$govt.ovld <- pf.flanders.data$govt.ovld - 1
pf.flanders.data$govt.nva <- pf.flanders.data$govt.nva - 1
pf.flanders.data$govt.vb <- pf.flanders.data$govt.vb - 1

################################################################################
## 2.2.10. SEAT EXPECTATIONS DATAFRAME
################################################################################
#
# Purpose:
# - Create harmonized variables representing expected seat change by party.
#
# Replication notes:
# - Note that item names differ across 2023 vs 2024 (q24_* vs Q24_*).
# - Downstream code assumes these derived variables exist with consistent names (seats.<party>).
#

pf.flanders.data.seats <- pf.flanders.data %>% drop_na(c(seats.pvda,seats.groen,
                                                         seats.vooruit,seats.cdv,
                                                         seats.ovld,seats.nva,
                                                         seats.vb,govt.pvda))

pf.flanders.data.govt <- pf.flanders.data %>% drop_na(c(govt.pvda,
                                                        govt.groen,govt.vooruit,
                                                        govt.cdv,govt.ovld,
                                                        govt.nva,govt.vb))

# Reshape from wide to long (seats)

pf.flanders.seats.long <- gather(pf.flanders.data.seats, party, seats, seats.pvda:seats.vb, factor_key=TRUE)

pf.flanders.seats.long <- pf.flanders.seats.long %>% filter(!is.na(seats))
pf.flanders.seats.long <- pf.flanders.seats.long %>% filter(!is.na(party))

pf.flanders.seats.long$party <- factor(pf.flanders.seats.long$party,
                                       levels = c('seats.pvda', 'seats.groen', 'seats.vooruit',
                                                  'seats.cdv', 'seats.ovld', 'seats.nva',
                                                  'seats.vb'), 
                                       labels = c("PVDA", "Groen", "Vooruit", "CD&V", 
                                                  "Open Vld", "N-VA", "Vlaams Belang"))

pf.flanders.seats.long$seats <- factor(pf.flanders.seats.long$seats,
                                       levels = c('3', '2', '1'), 
                                       labels = c("Fewer", "Same", "More"))

################################################################################
# NOTE:
# - The next block duplicates the seat-expectations long-format construction above.
# - It appears to be a retained draft/alternate version. It does not change results
#   if identical, but it is redundant and can be removed in a future refactor.
#
## 2.2.11. SEAT EXPECTATIONS DATAFRAME
################################################################################
#
# Purpose:
# - Create harmonized variables representing expected seat change by party.
#
# Replication notes:
# - Note that item names differ across 2023 vs 2024 (q24_* vs Q24_*).
# - Downstream code assumes these derived variables exist with consistent names (seats.<party>).
#

pf.flanders.data.seats <- pf.flanders.data %>% drop_na(c(seats.pvda,seats.groen,
                                                         seats.vooruit,seats.cdv,
                                                         seats.ovld,seats.nva,
                                                         seats.vb,govt.pvda))

# Reshape from wide to long
pf.flanders.seats.long <- gather(pf.flanders.data.seats, party, seats, seats.pvda:seats.vb, factor_key=TRUE)

# Remove NA values
pf.flanders.seats.long <- pf.flanders.seats.long %>% filter(!is.na(seats))
pf.flanders.seats.long <- pf.flanders.seats.long %>% filter(!is.na(party))

# Set levels and add value labels for new party column
pf.flanders.seats.long$party <- factor(pf.flanders.seats.long$party,
                                       levels = c('seats.pvda', 'seats.groen', 'seats.vooruit',
                                                  'seats.cdv', 'seats.ovld', 'seats.nva',
                                                  'seats.vb'), 
                                       labels = c("PVDA", "Groen", "Vooruit", "CD&V", 
                                                  "Open Vld", "N-VA", "Vlaams Belang"))

# Add value labels to answer categories for seats column
pf.flanders.seats.long$seats <- factor(pf.flanders.seats.long$seats,
                                       levels = c('3', '2', '1'), 
                                       labels = c("Fewer", "Same", "More"))

################################################################################
## 2.2.12. FIGURE: CHANGE IN SEATS ACCORDING TO POLITICIANS (2023 AND 2024)
################################################################################
#
# Purpose:
# - Generate a publication-ready figure for the descriptives section.
#
# Replication notes:
# - If you save to disk, ensure an output folder exists and use a deterministic filename.
# - For exact reproduction, record package versions and any font/theme settings.
#

png("pf_seats.png", units="in", width=10, height=7, res=1200)

pf.seats.g <- pf.flanders.seats.long %>% ggplot(aes(x=as.factor(seats), fill=as.factor(party)))+
  geom_bar(aes(y=..count../tapply(..count.., ..fill.., sum)[..fill..]), 
           position="dodge", alpha=.25, data = . %>% filter(survey == 2023), width = 0.9, show.legend = FALSE) +
  geom_bar(aes(y=..count../tapply(..count.., ..fill.., sum)[..fill..]), 
           position="dodge", data = . %>% filter(survey == 2024), width = 0.6, show.legend = FALSE) +
  ylab('% Respondents') +
  scale_y_continuous(limits=c(0,1), 
                     breaks=seq(0,1,.25), labels = scales::percent) + facet_wrap(~party, ncol = 4, labeller = labeller(party = seats_labels)) +
  scale_x_discrete("More or Fewer Seats?") +
  scale_fill_manual(values=c("#990000","#04847A","#FF0000","#FF6000", "#005DAA", 
                             "#FCBD1B","black")) + theme_bw()
#labs(caption = "\n2023 survey (semi-transparent bars): n=480. 2024 survey (solid bars): n=616.\n\nQuestion: Regardless of your own political affiliation, can you say for each of the parties below how likely you think it is that\nthey will have more or fewer seats in the Flemish Parliament after the next elections compared to the previous elections?")

pf.seats.g + 
  theme(axis.title.x=element_text(margin=margin(t=8, r=0, b=0, l=0), size = 13),
        axis.title.y=element_text(margin=margin(t=0, r=8, b=0, l=0), size = 13),
        axis.text.x=element_text(size = 12),
        axis.text.y=element_text(size = 12),
        strip.text = element_text(size = 12),
        plot.caption = element_text(size = 12, hjust = 0, face = "italic"),
        plot.title = element_blank())

dev.off()

################################################################################
## 2.2.13. FIGURE: CHANGE IN SEATS ACCORDING TO POLITICIANS BY EDUCATION LEVEL
################################################################################
#
# Purpose:
# - Generate a publication-ready figure for the descriptives section.
#
# Replication notes:
# - If you save to disk, ensure an output folder exists and use a deterministic filename.
# - For exact reproduction, record package versions and any font/theme settings.
#

# Set levels for education (3-category) variable
pf.flanders.seats.long$education3 <- factor(pf.flanders.seats.long$education3,
                                            levels = c("Low", "Moderate", "High"))

# Compute percentage of seat expectations for each party by education level
pf.pct.by.educ <- pf.flanders.seats.long %>%
  dplyr::filter(!is.na(seats), !is.na(education3), !is.na(party)) %>%
  dplyr::group_by(party, education3, seats) %>%
  dplyr::summarise(count = n(), .groups = "drop") %>%
  dplyr::group_by(party, education3) %>%
  dplyr::mutate(percentage = count / sum(count)) %>%
  ungroup()

# Create figure

png("pf_seats_by_educ.png", units="in", width=10, height=7, res=1200)

ggplot(pf.pct.by.educ, aes(x = seats, y = percentage, fill = education3)) +
  geom_col(position = "dodge", width = 0.8) +
  facet_wrap(~party, ncol = 4, labeller = labeller(party = seats_labels)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  scale_fill_manual(
    values = c(
      "Low" = "gray80",
      "Moderate" = "gray40",
      "High" = "black"
    )
  ) +
  labs(
    x = "More or Fewer Seats?",
    y = "% Respondents",
    fill = "Education Level") +
  theme_bw() +
  theme(
    panel.border = element_rect(colour = "black", fill = NA, size = 0.8),
    strip.background = element_rect(fill = "gray90", colour = "black", size = 0.8),
    strip.text = element_text(size = 12, face = "plain"),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11),
    axis.title.x = element_text(size = 13, margin = margin(t = 12)),
    axis.title.y = element_text(size = 13, margin = margin(r = 12)),
    plot.title = element_blank(),
    legend.position = "bottom",
    legend.box = "horizontal",
    legend.margin = margin(t = 10),
    legend.title = element_text(size = 11),
    legend.text = element_text(size = 10),
    legend.key.size = unit(0.4, "cm")
  )

dev.off()

################################################################################
## 2.2.14. GOVERNMENT EXPECTATIONS DATAFRAME
################################################################################
#
# Purpose:
# - Create harmonized variables representing expectations about parties entering government.
#
# Replication notes:
# - Item names differ across 2023 vs 2024 (q25_* vs Q25_*).
# - Later, the script shifts some government variables by -1 to align scales across waves.
#

# Create dataframe for government expectations
pf.flanders.data.govt <- pf.flanders.data %>% drop_na(c(govt.pvda,
                                                        govt.groen,govt.vooruit,
                                                        govt.cdv,govt.ovld,
                                                        govt.nva,govt.vb))

# Reshape from wide to long
pf.flanders.govt.long <- gather(pf.flanders.data.govt, party, govt, govt.pvda:govt.vb, factor_key=TRUE)

# Remove NAs
pf.flanders.govt.long <- pf.flanders.govt.long %>% filter(!is.na(govt))
pf.flanders.govt.long <- pf.flanders.govt.long %>% filter(!is.na(party))

# Set levels and add value labels for new party column
pf.flanders.govt.long$party <- factor(pf.flanders.govt.long$party,
                                      levels = c('govt.pvda', 'govt.groen', 'govt.vooruit',
                                                 'govt.cdv', 'govt.ovld', 'govt.nva',
                                                 'govt.vb'), 
                                      labels = c("PVDA", "Groen", "Vooruit", "CD&V", 
                                                 "Open Vld", "N-VA", "Vlaams Belang"))

################################################################################
## 2.2.15. FIGURE: PROB. OF BEING IN GOVT ACCORDING TO POLITICIANS (2023 AND 2024)
################################################################################
#
# Purpose:
# - Generate a publication-ready figure for the descriptives section.
#
# Replication notes:
# - If you save to disk, ensure an output folder exists and use a deterministic filename.
# - For exact reproduction, record package versions and any font/theme settings.
#

png("pf_govt.png", units="in", width=10, height=5, res=1200)

pf.govt.g <- pf.flanders.govt.long %>% ggplot(aes(x=reorder(party, govt), y=govt, fill=party)) + 
  geom_bar(stat="summary", fun="mean", alpha=.25, data = . %>% filter(survey == 2023), width = 0.9, show.legend = FALSE) +
  geom_bar(stat="summary", fun="mean", data = . %>% filter(survey == 2024), width = 0.6, show.legend = FALSE) +
  scale_y_continuous("Average Probability", breaks = seq(0,10,2)) +
  scale_x_discrete("") +
  scale_fill_manual(values=c("#990000","#04847A","#FF0000","#FF6000", "#005DAA", 
                             "#FCBD1B","black")) + theme_bw()

pf.govt.g + 
  theme(axis.title.x=element_blank(),
        axis.title.y=element_text(margin=margin(t=0, r=8, b=0, l=0), size = 15),
        axis.text.x=element_text(size = 14),
        axis.text.y=element_text(size = 14),
        plot.title = element_blank())

dev.off()

################################################################################
## 2.2. 15a. FIGURE: COMBINE CHANGE IN SEATS FIGURES
################################################################################
#
# Purpose:
# - Generate a publication-ready figure for the descriptives section.
#
# Replication notes:
# - If you save to disk, ensure an output folder exists and use a deterministic filename.
# - For exact reproduction, record package versions and any font/theme settings.
#

flanders.govt.long <- dplyr::bind_rows(cf.flanders.govt.long, pf.flanders.govt.long)

png("govt.png", units="in", width=10, height=5, res=1200)

govt.g <- flanders.govt.long %>% ggplot(aes(x=reorder(party, govt), y=govt, fill=party)) + 
  geom_bar(stat="summary", fun="mean", alpha=.25, data = . %>% filter(survey == 2023), width = 0.9, show.legend = FALSE) +
  geom_bar(stat="summary", fun="mean", data = . %>% filter(survey == 2024), width = 0.6, show.legend = FALSE) +
  scale_y_continuous("Average Probability", breaks = seq(0,10,2)) +
  scale_x_discrete("", labels = c("PVDA" = "PVDA", "Groen" = "Groen", "Vooruit" = "Vooruit*", 
                                  "Open Vld" = "Open Vld", "CD&V" = "CD&V*", 
                                  "N-VA" = "N-VA*", "Vlaams Belang" = "VB")) +
  scale_fill_manual(values=c("#990000","#04847A","#FF0000","#FF6000", "#005DAA", 
                             "#FCBD1B","black")) + facet_wrap(~type, ncol=2) + coord_flip() +
  #labs(caption = "\n2023 survey (semi-transparent bars): n=864 (citizens), n=468 (politicians).\n2024 survey (solid bars): n=840 (citizens), n=605 (politicians).\n\nQuestion: And can you also say for each of these parties how likely you think it is that they will be part of the next\nFlemish government?") +
  theme_bw() 

govt.g + 
  theme(axis.title.x=element_text(margin=margin(t=8, r=0, b=0, l=0), size = 14),
        axis.title.y=element_text(margin=margin(t=0, r=8, b=0, l=0), size = 14),
        axis.text.x=element_text(size = 13),
        #axis.text.x=element_text(size = 14, angle = 90, vjust = 0.5, hjust=1),
        axis.text.y=element_text(size = 13),
        strip.text = element_text(size = 13),
        plot.caption = element_text(size = 12, hjust = 0, face = "italic"),
        plot.title = element_blank())

dev.off()

################################################################################
## 2.2.16. FIGURE: AVERAGE GOVT PROB. ACCORDING TO POLITICIANS BY EDUCATION LEVEL
################################################################################
#
# Purpose:
# - Generate a publication-ready figure for the descriptives section.
#
# Replication notes:
# - If you save to disk, ensure an output folder exists and use a deterministic filename.
# - For exact reproduction, record package versions and any font/theme settings.
#

# Compute average expected probability of party being in government by education level
pf.avg.govt.educ <- pf.flanders.govt.long %>%
  dplyr::filter(!is.na(govt), !is.na(education3), !is.na(party)) %>%
  dplyr::group_by(party, education3) %>%
  dplyr::summarize(mean_govt = mean(govt), .groups = "drop")

# Create figure

png("pf_govt_by_educ.png", units = "in", width = 10, height = 7, res = 1200)

ggplot(pf.avg.govt.educ, aes(x = education3, y = mean_govt, fill = education3)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~ party, ncol = 4, labeller = labeller(party = govt_labels)) +
  labs(
    x = "Education Level",
    y = "Average Probability"
  ) +
  scale_fill_manual(
    values = c("Low" = "gray80", "Moderate" = "gray40", "High" = "black")
  ) +
  theme_bw() +
  theme(
    panel.border = element_rect(colour = "black", fill = NA, size = 0.8),
    strip.background = element_rect(fill = "gray90", colour = "black", size = 0.8),
    strip.text = element_text(size = 12, face = "plain"),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11),
    axis.title.x = element_text(size = 13, margin = margin(t = 12)),
    axis.title.y = element_text(size = 13, margin = margin(r = 12)),
    plot.title = element_blank(),
    legend.position = "none",
    legend.box = "horizontal",
    legend.margin = margin(t = 10),
    legend.title = element_text(size = 11),
    legend.text = element_text(size = 10),
    legend.key.size = unit(0.4, "cm")  # or smaller if needed
  )

dev.off()

################################################################################
## 2.2.17. ANONYMIZED PARTIES AND AFFILIATION FOR SEAT EXPECTATIONS
################################################################################
#
# Purpose:
# - Create harmonized variables representing expected seat change by party.
#
# Replication notes:
# - Note that item names differ across 2023 vs 2024 (q24_* vs Q24_*).
# - Downstream code assumes these derived variables exist with consistent names (seats.<party>).
#

# Create new anonymized columns
pf.flanders.seats.long$affiliation_anon <- pf.flanders.seats.long$affiliation
pf.flanders.seats.long$party_anon <- pf.flanders.seats.long$party

# Convert to character format
pf.flanders.seats.long$affiliation_anon <- as.character(pf.flanders.seats.long$affiliation_anon)
pf.flanders.seats.long$party_anon <- as.character(pf.flanders.seats.long$party_anon)

# Recode anonymized columns
pf.flanders.seats.long$affiliation_anon[pf.flanders.seats.long$affiliation_anon=="CD&V"] <- "Party D"
pf.flanders.seats.long$affiliation_anon[pf.flanders.seats.long$affiliation_anon=="Groen"] <- "Party B"
pf.flanders.seats.long$affiliation_anon[pf.flanders.seats.long$affiliation_anon=="N-VA"] <- "Party F"
pf.flanders.seats.long$affiliation_anon[pf.flanders.seats.long$affiliation_anon=="Open Vld"] <- "Party E"
pf.flanders.seats.long$affiliation_anon[pf.flanders.seats.long$affiliation_anon=="PVDA"] <- "Party A"
pf.flanders.seats.long$affiliation_anon[pf.flanders.seats.long$affiliation_anon=="Vooruit"] <- "Party C"
pf.flanders.seats.long$affiliation_anon[pf.flanders.seats.long$affiliation_anon=="Vlaams Belang"] <- "Party G"

pf.flanders.seats.long$party_anon[pf.flanders.seats.long$party_anon=="CD&V"] <- "Party D"
pf.flanders.seats.long$party_anon[pf.flanders.seats.long$party_anon=="Groen"] <- "Party B"
pf.flanders.seats.long$party_anon[pf.flanders.seats.long$party_anon=="N-VA"] <- "Party F"
pf.flanders.seats.long$party_anon[pf.flanders.seats.long$party_anon=="Open Vld"] <- "Party E"
pf.flanders.seats.long$party_anon[pf.flanders.seats.long$party_anon=="PVDA"] <- "Party A"
pf.flanders.seats.long$party_anon[pf.flanders.seats.long$party_anon=="Vooruit"] <- "Party C"
pf.flanders.seats.long$party_anon[pf.flanders.seats.long$party_anon=="Vlaams Belang"] <- "Party G"

# Set levels and add value labels for anonymized party column
pf.flanders.seats.long$party_anon <- factor(pf.flanders.seats.long$party_anon,
                                            levels = c('Party G', 'Party F', 'Party E', 'Party D', 'Party C', 'Party B', 'Party A'), 
                                            labels = c('Party G', 'Party F', 'Party E', 'Party D', 'Party C', 'Party B', 'Party A'))

################################################################################
## 2.2.18. FIGURE: POLITICIANS' SEAT EXPECTATIONS BY PARTY AFFILIATION
################################################################################
#
# Purpose:
# - Generate a publication-ready figure for the descriptives section.
#
# Replication notes:
# - If you save to disk, ensure an output folder exists and use a deterministic filename.
# - For exact reproduction, record package versions and any font/theme settings.
#

png("pf_seats_byparty.png", units="in", width=14, height=7, res=1200)

pf.seats.byparty.g <- pf.flanders.seats.long %>%
  subset(!is.na(affiliation_anon)) %>%
  ggplot(aes(party_anon)) +
  geom_bar(aes(fill=seats), stat="count", position="fill")+ 
  scale_y_continuous("Percentage of Respondents", labels=scales::label_percent(suffix = "")) +
  scale_fill_manual(values=c("#D4D4D4", "#909090", "black"))+
  labs(fill = "Seats") + 
  facet_wrap(~factor(affiliation_anon, levels=c("Party A", "Party B", "Party C", "Party D", 
                                                "Party E", "Party F", "Party G",
                                                "Other/Independent"),
                     labels=c("Party A", "Party B", "Party C", "Party D", 
                              "Party E", "Party F", "Party G",
                              "Other/Independent")), ncol = 4) + theme_bw()

pf.seats.byparty.g <- pf.seats.byparty.g +
  theme(axis.title.y=element_blank(),
        axis.title.x=element_text(margin=margin(t=8, r=0, b=0, l=0), size = 18),
        axis.text.y=element_text(size = 16),
        axis.text.x=element_text(size = 16),
        strip.text = element_text(size = 18),
        legend.text = element_text(size = 16),
        legend.title = element_text(size = 16),
        plot.title = element_blank())

pf.seats.byparty.g + coord_flip()

dev.off()

################################################################################
## 2.2.19. ANONYMIZED PARTIES AND AFFILIATION FOR GOVERNMENT EXPECTATIONS
################################################################################
#
# Purpose:
# - Create harmonized variables representing expectations about parties entering government.
#
# Replication notes:
# - Item names differ across 2023 vs 2024 (q25_* vs Q25_*).
# - Later, the script shifts some government variables by -1 to align scales across waves.
#

# Create new anonymized columns
pf.flanders.govt.long$affiliation_anon <- pf.flanders.govt.long$affiliation
pf.flanders.govt.long$party_anon <- pf.flanders.govt.long$party

# Convert to character format
pf.flanders.govt.long$affiliation_anon <- as.character(pf.flanders.govt.long$affiliation_anon)
pf.flanders.govt.long$party_anon <- as.character(pf.flanders.govt.long$party_anon)

# Recode anonymized columns
pf.flanders.govt.long$affiliation_anon[pf.flanders.govt.long$affiliation_anon=="CD&V"] <- "Party D"
pf.flanders.govt.long$affiliation_anon[pf.flanders.govt.long$affiliation_anon=="Groen"] <- "Party B"
pf.flanders.govt.long$affiliation_anon[pf.flanders.govt.long$affiliation_anon=="N-VA"] <- "Party F"
pf.flanders.govt.long$affiliation_anon[pf.flanders.govt.long$affiliation_anon=="Open Vld"] <- "Party E"
pf.flanders.govt.long$affiliation_anon[pf.flanders.govt.long$affiliation_anon=="PVDA"] <- "Party A"
pf.flanders.govt.long$affiliation_anon[pf.flanders.govt.long$affiliation_anon=="Vooruit"] <- "Party C"
pf.flanders.govt.long$affiliation_anon[pf.flanders.govt.long$affiliation_anon=="Vlaams Belang"] <- "Party G"

pf.flanders.govt.long$party_anon[pf.flanders.govt.long$party_anon=="CD&V"] <- "Party D"
pf.flanders.govt.long$party_anon[pf.flanders.govt.long$party_anon=="Groen"] <- "Party B"
pf.flanders.govt.long$party_anon[pf.flanders.govt.long$party_anon=="N-VA"] <- "Party F"
pf.flanders.govt.long$party_anon[pf.flanders.govt.long$party_anon=="Open Vld"] <- "Party E"
pf.flanders.govt.long$party_anon[pf.flanders.govt.long$party_anon=="PVDA"] <- "Party A"
pf.flanders.govt.long$party_anon[pf.flanders.govt.long$party_anon=="Vooruit"] <- "Party C"
pf.flanders.govt.long$party_anon[pf.flanders.govt.long$party_anon=="Vlaams Belang"] <- "Party G"

# Set levels and add value labels for anonymized party column
pf.flanders.govt.long$party_anon <- factor(pf.flanders.govt.long$party_anon,
                                           levels = c('Party G', 'Party F', 'Party E', 'Party D', 'Party C', 'Party B', 'Party A'), 
                                           labels = c('Party G', 'Party F', 'Party E', 'Party D', 'Party C', 'Party B', 'Party A'))

################################################################################
## 2.2.20. FIGURE: POLITICIANS' GOVERNMENT EXPECTATIONS BY PARTY AFFILIATION
################################################################################
#
# Purpose:
# - Generate a publication-ready figure for the descriptives section.
#
# Replication notes:
# - If you save to disk, ensure an output folder exists and use a deterministic filename.
# - For exact reproduction, record package versions and any font/theme settings.
#

png("pf_govt_byparty.png", units="in", width=14, height=7, res=1200)

pf.govt.byparty.g <- pf.flanders.govt.long %>%
  subset(!is.na(affiliation_anon)) %>%
  ggplot(aes(x = party_anon, y = govt)) +
  geom_bar(stat = "summary", fun = mean) +
  scale_y_continuous("Probability", limits=c(0,1), 
                     breaks=seq(0,10,2)) +
  facet_wrap(~factor(affiliation_anon, levels=c("Party A", "Party B", "Party C", "Party D", 
                                                "Party E", "Party F", "Party G",
                                                "Other/Independent"),
                     labels=c("Party A", "Party B", "Party C", "Party D", 
                              "Party E", "Party F", "Party G",
                              "Other/Independent")), ncol = 4) + theme_bw()

pf.govt.byparty.g <- pf.govt.byparty.g +
  theme(axis.title.y=element_blank(),
        axis.title.x=element_text(margin=margin(t=8, r=0, b=0, l=0), size = 18),
        axis.text.y=element_text(size = 16),
        axis.text.x=element_text(size = 16),
        strip.text = element_text(size = 18),
        legend.text = element_text(size = 16),
        legend.title = element_text(size = 16),
        plot.title = element_blank())

pf.govt.byparty.g + coord_flip()

dev.off()

################################################################################
## 2.3. MERGED CITIZEN AND POLITICIAN DATA
################################################################################
#
# Purpose:
# - Combine citizen and politician long datasets for side-by-side comparison plots.
#
# Replication notes:
# - This section assumes the citizen and politician long data objects were created above.
# - If you change party labels or factor order, revisit both sections for consistency.
#

################################################################################
## 2.3.1. SEAT EXPECTATIONS DATAFRAME
################################################################################
#
# Purpose:
# - Create harmonized variables representing expected seat change by party.
#
# Replication notes:
# - Note that item names differ across 2023 vs 2024 (q24_* vs Q24_*).
# - Downstream code assumes these derived variables exist with consistent names (seats.<party>).
#

# Merge dataframes
flanders.seats.long <- bind_rows(cf.flanders.seats.long, pf.flanders.seats.long)

# Create new column (0 = citizens, 1 = politicians)
flanders.seats.long$politicians[flanders.seats.long$type=="(a) According to Citizens:"] <- 0
flanders.seats.long$politicians[flanders.seats.long$type=="(b) According to Politicians:"] <- 1

################################################################################
## 2.3.2. FIGURE: CHANGE IN SEATS ACCORDING TO CITIZENS AND POLITICIANS
################################################################################
#
# Purpose:
# - Generate a publication-ready figure for the descriptives section.
#
# Replication notes:
# - If you save to disk, ensure an output folder exists and use a deterministic filename.
# - For exact reproduction, record package versions and any font/theme settings.
#

png("cf_pf_seats.png", units="in", width=10, height=7, res=1200)

cf.pf.seats.g <- flanders.seats.long %>% ggplot(aes(x=as.factor(seats), fill=as.factor(party)))+
  geom_bar(aes(y=..count../tapply(..count.., ..fill.., sum)[..fill..]), 
           position="dodge", alpha=.25, data = . %>% filter(politicians == 0), width = 0.9, show.legend = FALSE) +
  geom_bar(aes(y=..count../tapply(..count.., ..fill.., sum)[..fill..]), 
           position="dodge", data = . %>% filter(politicians == 1), width = 0.6, show.legend = FALSE) +
  ylab('% Respondents') +
  scale_y_continuous(limits=c(0,1), 
                     breaks=seq(0,1,.25), labels = scales::percent) + facet_wrap(~party, ncol = 4) +
  scale_x_discrete("More or Fewer Seats?") +
  scale_fill_manual(values=c("#990000","#04847A","#FF0000","#FF6000", "#005DAA", 
                             "#FCBD1B","black")) + theme_bw()

cf.pf.seats.g + 
  theme(axis.title.x=element_text(margin=margin(t=8, r=0, b=0, l=0), size = 13),
        axis.title.y=element_text(margin=margin(t=0, r=8, b=0, l=0), size = 13),
        axis.text.x=element_text(size = 12),
        axis.text.y=element_text(size = 12),
        strip.text = element_text(size = 12),
        plot.caption = element_text(size = 12, hjust = 0, face = "italic"),
        plot.title = element_blank())

dev.off()

################################################################################
## 2.3.3. GOVERNMENT EXPECTATIONS DATAFRAME
################################################################################
#
# Purpose:
# - Create harmonized variables representing expectations about parties entering government.
#
# Replication notes:
# - Item names differ across 2023 vs 2024 (q25_* vs Q25_*).
# - Later, the script shifts some government variables by -1 to align scales across waves.
#

# Merge dataframes
flanders.govt.long <- bind_rows(cf.flanders.govt.long, pf.flanders.govt.long)

flanders.govt.long$politicians[flanders.govt.long$type=="(a) According to Citizens:"] <- 0
flanders.govt.long$politicians[flanders.govt.long$type=="(b) According to Politicians:"] <- 1

png("cf_pf_govt.png", units="in", width=10, height=5, res=1200)

cf.pf.govt.g <- flanders.govt.long %>% ggplot(aes(x=reorder(party, govt), y=govt, fill=party)) + 
  geom_bar(stat="summary", fun="mean", alpha=.25, data = . %>% filter(politicians == 0), width = 0.9, show.legend = FALSE) +
  geom_bar(stat="summary", fun="mean", data = . %>% filter(politicians == 1), width = 0.6, show.legend = FALSE) +
  scale_y_continuous("Average Probability", limits=c(0,1), breaks = seq(0,1,.2)) +
  scale_x_discrete("") +
  scale_fill_manual(values=c("#990000","#04847A","#FF0000","#FF6000", "#005DAA", 
                             "#FCBD1B","black")) + theme_bw()

cf.pf.govt.g + 
  theme(axis.title.x=element_blank(),
        axis.title.y=element_text(margin=margin(t=0, r=8, b=0, l=0), size = 15),
        axis.text.x=element_text(size = 14),
        axis.text.y=element_text(size = 14),
        plot.title = element_blank())

dev.off()



# List of target columns
cols <- c("seats.pvda", "seats.groen", "seats.vooruit", 
          "seats.cdv", "seats.ovld", "seats.nva", "seats.vb")

# ----- (a) Grouped by survey -----
cf_grouped_pct <- cf.flanders.data %>%
  dplyr::select(survey, dplyr::all_of(cols)) %>%
  tidyr::pivot_longer(-survey, names_to = "party", values_to = "seats") %>%
  dplyr::filter(!is.na(seats), seats != -99) %>%
  dplyr::group_by(survey, party, seats) %>%
  dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
  dplyr::group_by(survey, party) %>%
  dplyr::mutate(percentage = round(100 * n / sum(n), 2)) %>%
  dplyr::ungroup()

# ----- (b) Overall (not grouped by survey) -----
cf_overall_pct <- cf.flanders.data %>%
  dplyr::select(dplyr::all_of(cols)) %>%
  tidyr::pivot_longer(dplyr::everything(), names_to = "party", values_to = "seats") %>%
  dplyr::filter(!is.na(seats), seats != -99) %>%
  dplyr::group_by(party, seats) %>%
  dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
  dplyr::group_by(party) %>%
  dplyr::mutate(percentage = round(100 * n / sum(n), 2)) %>%
  dplyr::ungroup()

# ----- Write to Excel -----
writexl::write_xlsx(
  list(
    "Grouped_by_Survey" = cf_grouped_pct,
    "Overall" = cf_overall_pct
  ),
  path = "cf_seats_percentage_summary.xlsx"
)

# ----- (a) Grouped by survey -----
pf_grouped_pct <- pf.flanders.data %>%
  dplyr::select(survey, dplyr::all_of(cols)) %>%
  tidyr::pivot_longer(-survey, names_to = "party", values_to = "seats") %>%
  dplyr::filter(!is.na(seats), seats != -99) %>%
  dplyr::group_by(survey, party, seats) %>%
  dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
  dplyr::group_by(survey, party) %>%
  dplyr::mutate(percentage = round(100 * n / sum(n), 2)) %>%
  dplyr::ungroup()

# ----- (b) Overall (not grouped by survey) -----
pf_overall_pct <- pf.flanders.data %>%
  dplyr::select(dplyr::all_of(cols)) %>%
  tidyr::pivot_longer(dplyr::everything(), names_to = "party", values_to = "seats") %>%
  dplyr::filter(!is.na(seats), seats != -99) %>%
  dplyr::group_by(party, seats) %>%
  dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
  dplyr::group_by(party) %>%
  dplyr::mutate(percentage = round(100 * n / sum(n), 2)) %>%
  dplyr::ungroup()

# ----- Write to Excel -----
writexl::write_xlsx(
  list(
    "Grouped_by_Survey" = pf_grouped_pct,
    "Overall" = pf_overall_pct
  ),
  path = "pf_seats_percentage_summary.xlsx"
)


# List of target columns
govt_cols <- c("govt.pvda", "govt.groen", "govt.vooruit", 
               "govt.cdv", "govt.ovld", "govt.nva", "govt.vb")

# ----- (a) Averages grouped by survey -----
cf_grouped_avg <- cf.flanders.data %>%
  dplyr::select(survey, dplyr::all_of(govt_cols)) %>%
  tidyr::pivot_longer(-survey, names_to = "party", values_to = "value") %>%
  dplyr::filter(!is.na(value), value != -99) %>%
  dplyr::group_by(survey, party) %>%
  dplyr::summarise(avg = round(mean(value), 2), .groups = "drop") %>%
  tidyr::pivot_wider(names_from = party, values_from = avg)

# ----- (b) Overall averages (not grouped) -----
cf_overall_avg <- cf.flanders.data %>%
  dplyr::select(dplyr::all_of(govt_cols)) %>%
  tidyr::pivot_longer(everything(), names_to = "party", values_to = "value") %>%
  dplyr::filter(!is.na(value), value != -99) %>%
  dplyr::group_by(party) %>%
  dplyr::summarise(avg = round(mean(value), 2), .groups = "drop") %>%
  tidyr::pivot_wider(names_from = party, values_from = avg)

# ----- Write to Excel -----
writexl::write_xlsx(
  list(
    "Govt_Avg_By_Survey" = cf_grouped_avg,
    "Govt_Overall_Avg" = cf_overall_avg
  ),
  path = "cf_govt_average_summary.xlsx"
)

# ----- (a) Averages grouped by survey -----
pf_grouped_avg <- pf.flanders.data %>%
  dplyr::select(survey, dplyr::all_of(govt_cols)) %>%
  tidyr::pivot_longer(-survey, names_to = "party", values_to = "value") %>%
  dplyr::filter(!is.na(value), value != -99) %>%
  dplyr::group_by(survey, party) %>%
  dplyr::summarise(avg = round(mean(value), 2), .groups = "drop") %>%
  tidyr::pivot_wider(names_from = party, values_from = avg)

# ----- (b) Overall averages (not grouped) -----
pf_overall_avg <- pf.flanders.data %>%
  dplyr::select(dplyr::all_of(govt_cols)) %>%
  tidyr::pivot_longer(everything(), names_to = "party", values_to = "value") %>%
  dplyr::filter(!is.na(value), value != -99) %>%
  dplyr::group_by(party) %>%
  dplyr::summarise(avg = round(mean(value), 2), .groups = "drop") %>%
  tidyr::pivot_wider(names_from = party, values_from = avg)

# ----- Write to Excel -----
writexl::write_xlsx(
  list(
    "Govt_Avg_By_Survey" = pf_grouped_avg,
    "Govt_Overall_Avg" = pf_overall_avg
  ),
  path = "pf_govt_average_summary.xlsx"
)

# =============================================================================
# END OF SCRIPT
# =============================================================================